**This files cleans the hmda data from 2007 to 2017
**Data can be downloaded at: https://www.consumerfinance.gov/data-research/hmda/historic-data/

cd ~/share_realestate/hmda

*this renaming program only applies to reporter panels of year 2010 and after
program rename_new_panel
rename activityyear year
rename respondentid respondent_id
rename agencycode agency_code
rename parentrespondentid parent_id
rename parentnamepanel parent_name
rename parentcitypanel parent_city
rename parentstatepanel parent_state
rename region region
rename assets assets
rename otherlendercode other_lender_code
rename respondentnamepanel respondent_name
rename respondentcitypanel respondent_city
rename respondentstatepanel respondent_state
rename topholderrssdid topholder_rssdid
rename topholdername topholder_name
rename topholdercity topholder_city
rename topholderstate topholder_state
rename topholdercountry topholder_country
rename respondentrssdid resp_rssdid
rename parentrssdid parent_rssdid
rename respondentfipsstatenumber resp_fips
end

**for panels 2007-2009
program rename_new_panel_prior
rename activityyear year
rename respondentid respondent_id
rename agencycode agency_code
rename parentid parent_id
rename parentname parent_name
rename parentcity parent_city
rename parentstate parent_state
rename region region
rename assets assets
rename otherlendercode other_lender_code
rename respondentname respondent_name
rename respondentaddress respondent_address
rename respondentzipcode respondent_zipcode
rename respondentcity respondent_city
rename respondentstate respondent_state
end

forvalues i=2010/2017{
	import delimited using ./reporter_panel_raw/hmda_`i'_panel.csv,clear
	rename_new_panel
	save ./`i'/panel_`i'
}

forvalues i=2007/2009{
	import delimited using ./reporter_panel_raw/hmda_`i'_panel.csv,clear
	rename_new_panel_prior
	save ./`i'/panel_`i'
}

**clean the loan application records (lar) files
forvalues i=2007/2017{

	mkdir `i'
	import delimited using ./all_records_raw/hmda_`i'_nationwide_all-records_codes.csv,clear

	rename as_of_year year
	rename loan_amount_000s loan_amount
	rename msamd msa
	rename applicant_income_000s income 

	save ./`i'/all_records_`i'
	keep if action_taken==1
	save ./`i'/loans_`i'
}

**the variables often have different types across these files
*display the numeric and string variables for each year
forvalues i=2007/2017{
	use ./`i'/loans_`i',clear
	di `i'
	di "numeric"
	ds, has(type numeric) v(30)
	di "string"
	ds, has(type string) v(30)
}

*because of the consistency of variable types across years; set standard types for variables 
local numvars "year agency_code loan_type property_type loan_purpose owner_occupancy loan_amount preapproval action_taken applicant_ethnicity co_applicant_ethnicity applicant_race_1 co_applicant_race_1 applicant_sex co_applicant_sex purchaser_type hoepa_status lien_status sequence_number application_date_indicator"

local strvars "respondent_id msa state_code county_code census_tract_number applicant_race_2 applicant_race_3 applicant_race_4 applicant_race_5 co_applicant_race_2 co_applicant_race_3 co_applicant_race_4 co_applicant_race_5 income denial_reason_1 denial_reason_2 denial_reason_3 rate_spread edit_status population minority_population hud_median_family_income tract_to_msamd_income number_of_owner_occupied_units number_of_1_to_4_family_units"

*do for year 2007 to 2017
forvalues i=2007/2017{
	use ./`i'/loans_`i',clear
	foreach var in `strvars'{
		capture confirm numeric var `var'
		if _rc==0{
			tostring `var',force replace
		}
	}

	foreach var in `numvars'{
		capture confirm string var `var'
		if _rc==0{
			tab `var' if real(`var')==.
			destring `var',force replace
		}
	}
	save,replace
}

*append all years of loans together
clear
forvalues i=2007/2017{
	di `i'
	append using ./`i'/loans_`i'
}
tab agency_code year,m
drop if agency_code==.
save loans07_17


*merge with the avery file
merge m:1 year respondent_id agency_code using avery_panel,keepusing(TYPE)
tab year _m
keep if _m==3
drop _m
*only 1990 and 1991 have non-numeric codes
gen sb=1 if inlist(TYPE,40,41)
replace sb=0 if sb!=1 & TYPE!=.
gen sb2=1 if TYPE==40
replace sb2=0 if sb2!=1 & TYPE!=.
tab TYPE sb,m
drop if sb==.

save,replace
